intro_images_path='./Images/Intro_images/'
edge_images_path='../'#../Images/Edge_images/'
seg_images_path='../'#../Images/Seg_images/'
feature_images_path='../'#../Images/Feature_images/'
output_path='./Images/Outputs/'
print('Image paths ....')
Image paths ....
Image Processing¶
- Arithmetic and logical operation
- Histogram equalization
- Convolution
- Correlation
- Image pyramids
- Template matching
Image difference¶
import numpy as np
import cv2
import matplotlib.pyplot as plt
bg = cv2.imread(intro_images_path+'background.png',0)
fg = cv2.imread(intro_images_path+'foreground.png',0)
motion =np.absolute(cv2.subtract(fg,bg))
plt.subplot(131), plt.imshow(fg, 'gray')
plt.xticks([]), plt.yticks([])
plt.subplot(132), plt.imshow(bg,'gray')
plt.xticks([]), plt.yticks([])
plt.subplot(133), plt.imshow(motion, 'gray')
plt.xticks([]), plt.yticks([])
#plt.subplot(224), plt.plot(hist_full)
plt.show()
Exercise Histogram Equalization in OpenCV¶
https://docs.opencv.org/3.4.2/d5/daf/tutorial_py_histogram_equalization.html
OpenCV has a function to do that, cv2.equalizeHist(). Its input is just a grayscale image and the output is the histogram-equalized image. You can take different images with different light conditions, equalize them and check the results.
Histogram equalization is good when the histogram of an image is confined to a particular region. It doesn’t work on images where both bright and dark pixels are present. In those cases an adaptive equalization is more convinient.
img = cv2.imread(intro_images_path+ 'Picture1.jpg',0)
plt.imshow(img,'gray')
plt.xticks([]), plt.yticks([])
plt.show()
equ = cv2.equalizeHist(img)
#res = np.hstack((img,equ)) #stacking images side-by-side
plt.imshow(equ,'gray')
plt.xticks([]), plt.yticks([])
plt.show()
plt.figure(figsize= (0.05*img.shape[0],0.05*img.shape[1]))
plt.subplot(421), plt.imshow(img, 'gray')
plt.xticks([]), plt.yticks([])
plt.subplot(423), plt.imshow(equ,'gray')
plt.xticks([]), plt.yticks([])
hist = cv2.calcHist([img],[0],None,[256],[0,256])
hist_norm=hist/img.size
plt.subplot(422), plt.plot(hist_norm)
hist = cv2.calcHist([equ],[0],None,[256],[0,256])
hist_norm=hist/equ.size
plt.subplot(424), plt.plot(hist_norm)
plt.show()
plt.figure(figsize= (0.05*img.shape[0],0.05*img.shape[1]))
plt.subplot(425), plt.imshow(img, 'gray')
plt.xticks([]), plt.yticks([])
hist = cv2.calcHist([img],[0],None,[256],[0,256])
hist_norm=hist/img.size
plt.subplot(426), plt.plot(hist_norm)
#Adaptive histogram equalization
clahe = cv2.createCLAHE(clipLimit=10.0, tileGridSize=(16,16))
cl1 = clahe.apply(img)
plt.subplot(427), plt.imshow(cl1,'gray')
plt.xticks([]), plt.yticks([])
hist = cv2.calcHist([cl1],[0],None,[256],[0,256])
hist_norm=hist/cl1.size
plt.subplot(428), plt.plot(hist_norm)
plt.show()
Convolution and Correlation¶
Convolution filtering¶
import cv2
import numpy as np
from matplotlib import pyplot as plt
img = cv2.imread(intro_images_path+'pazo.jpg',0)
kernel = np.array([[0, -1, 0], [-1, 5, -1],[0, -1,0]])
dst = cv2.filter2D(img,0,kernel)
plt.figure(figsize= (0.05*img.shape[0],0.05*img.shape[1]))
plt.subplot(121),plt.imshow(img,'gray'), plt.title('Original')
plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(dst,'gray'), plt.title('Filtered')
plt.xticks([]), plt.yticks([])
plt.show()
#cv2.imwrite('pazo.jpg',img)
Convolution for image smoothing (blurring)¶
Image blurring is achieved by convolving the image with a low-pass filter kernel taht removes high frequency content (eg: noise, edges). Edges are blurred a little bit in this operation. There are several blurring techniques:
*Mean filtering:*
The idea of mean filtering is simply to replace each pixel value in an image with the mean value of its neighbors, including itself. This is done in by convolving an image with a normalized box filter. Opencv provide us with the function cv.blur(), which takes the width and height of the kernel as parameters. A 3x3 normalized box filter would look like the below:
K=[[1/9 1/9 1/9],
[1/9 1/9 1/9],
[1/9 1/9 1/9]]
*Gaussian average*
In Gaussian Blur operation, the image is convolved with a Gaussian filter instead of the box filter. Opencv provide us with the GaussianBlur(src, dst, ksize, sigma) method, which accepts the following parameters −
src − input image
dst − output image)
ksize − size of the kernel.
sigmaX − Gaussian kernel standard deviation in X and Y directions.
import cv2
import numpy as np
from matplotlib import pyplot as plt
img = cv2.imread(intro_images_path+'zebra.jpg')
blur1 = cv2.GaussianBlur(img,(15,15),0)
blur2 = cv2.blur(img,(15,15))
plt.figure(figsize= (0.05*img.shape[0],0.05*img.shape[1]))
plt.subplot(131),plt.imshow(img),plt.title('Original')
plt.xticks([]), plt.yticks([])
plt.subplot(132),plt.imshow(blur1,'gray'),plt.title('GaussBlurr')
plt.xticks([]), plt.yticks([])
plt.subplot(133),plt.imshow(blur2),plt.title('Blurr')
plt.xticks([]), plt.yticks([])
plt.show()
cv2.imwrite('zebragb.jpg',blur1)
cv2.imwrite('zebrab.jpg',blur2)
img = cv2.imread(intro_images_path+'gate.jpg')
img2 = np.zeros(img.shape[:2], np.uint8)
img2[100:170, 100:170] = 255
img2[180:250, 100:170] = 255
img2[100:170, 200:270] = 255
img2[180:250, 200:270] = 255
blur1 = cv2.GaussianBlur(img2,(15,15),0)
blur2 = cv2.blur(img2,(15,15))
plt.figure(figsize= (0.05*img2.shape[0],0.05*img2.shape[1]))
plt.subplot(131),plt.imshow(img2,'gray'),plt.title('Original')
plt.xticks([]), plt.yticks([])
plt.subplot(132),plt.imshow(blur1,'gray'),plt.title('GaussBlurr')
plt.xticks([]), plt.yticks([])
plt.subplot(133),plt.imshow(blur2,'gray'),plt.title('Blurr')
plt.xticks([]), plt.yticks([])
plt.show()
Correlation filtering¶
Exercise: Template Matching https://docs.opencv.org/3.4.2/d4/dc6/tutorial_py_template_matching.html
Template Matching is a method to find certain image content (template) in another image. OpenCV comes with a function cv2.matchTemplate() for this purpose. It simply slides the template image (T) over the input image (I), as in 2D correlation, and compares the template against each patch of the input image to give the image (R). Several comparison methods are implemented in OpenCV. They return a grayscale image (R), where each pixel denotes how much does the neighbourhood of that pixel match with template.
import cv2
import numpy as np
from matplotlib import pyplot as plt
img = cv2.imread(intro_images_path+'einstein.jpg',0)
img2 = img.copy()
template = cv2.imread(intro_images_path+'einstein_eye.jpg',0)
w, h = template.shape[::-1]
# All the 6 methods for comparison in a list
methods = ['cv2.TM_CCOEFF', 'cv2.TM_CCOEFF_NORMED', 'cv2.TM_CCORR',
'cv2.TM_CCORR_NORMED', 'cv2.TM_SQDIFF', 'cv2.TM_SQDIFF_NORMED']
for meth in methods:
img = img2.copy()
method = eval(meth)
# Apply template Matching
res = cv2.matchTemplate(img,template,method)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
# If the method is TM_SQDIFF or TM_SQDIFF_NORMED, take minimum
if method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]:
top_left = min_loc
else:
top_left = max_loc
bottom_right = (top_left[0] + w, top_left[1] + h)
img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
cv2.rectangle(img,top_left, bottom_right, [0,255,0], 2)
plt.subplot(121),plt.imshow(res,cmap = 'gray')
plt.title('Matching Result'), plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(img,cmap = 'gray')
plt.title('Detected Point'), plt.xticks([]), plt.yticks([])
plt.suptitle(meth)
plt.show()
if method in [cv2.TM_CCOEFF_NORMED]:
res=(res-min_val)/(max_val-min_val)*255
cv2.imwrite('einstein_ccoeff_normed.jpg',res)
Image Pyramids¶
An image pyramid is a collection of images usually obtained by downsampling a single original image. It is a set of layers in which the higher the layer, the smaller the size.
Image pyramids are used to achieve scale-invariant processing in contexts as the following:
- template matching
- interest point detection
- object detection
Gaussian pyramid¶
The basic steps in building a Gaussian image pyramid are:
- Gaussian smooth the image
- Reduce image dimensions by half by discarding every other row and and every other column
- Repeat this process until desired numbers levels are achieved or the image size reached a minimun
Exercise¶
Comparison between pyramids with and without smoothing prior to downscaling
# !pip install scikit-image
# !conda install -c conda-forge scikit-image -y
import numpy as np
import cv2
import matplotlib.pyplot as plt
from skimage import data
def pyramid_bf(image,l=5, minSizex=8, minSizey=8):
# yield the original image
level=0
yield image
# keep looping over the pyramid
while True:
if level==l or image.shape[0]<minSizex or image.shape[1]<minSizey:
break
h,w=image.shape[:2]
image=image[:h+1:2,:w+1:2]
# if the resized image does not meet the supplied minimum
# size, then stop constructing the pyramid
# yield the next image in the pyramid
yield image
level=level+1
def pyramid_sm(image,l=5, minSizex=8, minSizey=8):
level=0
yield image
while True:
if level==l or image.shape[0]<minSizex or image.shape[1]<minSizey:
break
image = cv2.GaussianBlur(image,(5,5),0)
h,w=image.shape[:2]
image=image[:h+1:2,:w+1:2]
yield image
level=level+1
image = cv2.imread(intro_images_path + 'einstein.jpg',0)#'aliasing.jpg',0)#
for layer in pyramid_bf(image,l=3):
print(layer.shape)
plt.figure(figsize= (0.02*layer.shape[0],0.02*layer.shape[1]))
plt.imshow(layer,cmap='gray')
plt.xticks([]), plt.yticks([])
plt.show()
plt.figure(figsize= (0.02*image.shape[0],0.02*image.shape[1]))
plt.imshow(layer,cmap='gray')
plt.xticks([]), plt.yticks([])
plt.show()
for layer in pyramid_sm(image, l=3):
print(layer.shape)
plt.figure(figsize= (0.02*layer.shape[0],0.02*layer.shape[1]))
plt.imshow(layer,cmap='gray')
plt.xticks([]), plt.yticks([])
plt.show()
plt.figure(figsize= (0.02*image.shape[0],0.02*image.shape[1]))
plt.imshow(layer,cmap='gray')
plt.xticks([]), plt.yticks([])
plt.show()
(491, 383)
(246, 192)
(123, 96)
(62, 48)
(491, 383)
(246, 192)
(123, 96)
(62, 48)
Opencv function
OpenCV has a specific function to downscale images:
def pyramid(layer, l=4, minSizex=8, minSizey=8):
level=0
yield layer
print(layer.shape)
while True:
layer = cv2.pyrDown(layer)
if level== l or layer.shape[0]<minSizex or layer.shape[1]<minSizey:
break
yield layer
level=level+1
for layer in pyramid(image, l=3):
print(layer.shape)
plt.figure(figsize= (0.02*layer.shape[0],0.02*layer.shape[1]))
plt.imshow(layer,cmap='gray')
plt.xticks([]), plt.yticks([])
plt.show()
plt.figure(figsize= (0.02*image.shape[0],0.02*image.shape[1]))
plt.imshow(layer,cmap='gray')
plt.xticks([]), plt.yticks([])
plt.show()
(491, 383)
(491, 383) (246, 192)
(123, 96)
(62, 48)
Example of application: Template Matching¶
https://docs.opencv.org/3.4.2/d4/dc6/tutorial_py_template_matching.html
Template Matching is a method for searching and finding the location of a template image in a larger image. In order to cope with different sizes of the occurrence of the template we can take advantage of image pyramid.
EXERCISE: Template matching and Gaussian pyramid¶
Based on the exercises about template matching and the construction of Gaussian pyramids try to detect as much tunas as posible in the image Tuna_Relative_Sizes.jpg given the template in Tuna_template.jpg.
# Example 1: Template Pyramid (Fixed Image Scale) without NMS
#
# This example demonstrates object detection by scaling the template
# (creating a template pyramid) and matching it against the original,
# fixed-size image.
#
# Non-Max Suppression (NMS) is intentionally omitted. As a result,
# a single object may generate multiple, overlapping detections
# at different scales or positions.
#
# Note: Various detection thresholds were evaluated. The threshold
# selected represents a deliberate balance between detection efficacy
# and minimizing noise (false positives).
import cv2
import numpy as np
import matplotlib.pyplot as plt
def build_template_pyramid(template, scales=[0.5, 0.75, 1.0, 1.5, 2.0]):
"""
Builds a template pyramid at different scales.
RETURNS: the pyramid and the scales that were successfully generated.
"""
pyramid = []
valid_scales = [] # <-- Store the scales that actually work
for scale in scales:
if scale == 1.0:
pyramid.append(template)
valid_scales.append(scale)
else:
new_width = int(template.shape[1] * scale)
new_height = int(template.shape[0] * scale)
# Only add if the resulting size is valid
if new_width > 0 and new_height > 0:
resized_template = cv2.resize(template, (new_width, new_height))
pyramid.append(resized_template)
valid_scales.append(scale)
return pyramid, valid_scales # <-- Return both
def detect_tunas_corrected(main_image, template, threshold=0.6):
"""
CORRECTED detection using multiple template scales
"""
# Convert to grayscale
if len(main_image.shape) == 3:
main_gray = cv2.cvtColor(main_image, cv2.COLOR_BGR2GRAY)
else:
main_gray = main_image
if len(template.shape) == 3:
template_gray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
else:
template_gray = template
# Scales to detect tuna of different sizes
scales = [0.4, 0.5, 0.6, 0.75, 0.9, 1.0, 1.1, 1.25, 1.5, 1.75, 2.0]
# Use the corrected function that returns valid scales
template_pyramid, valid_scales = build_template_pyramid(template_gray, scales)
detections = []
for i, scaled_template in enumerate(template_pyramid):
scale = valid_scales[i] # <-- Use the valid scale
template_h, template_w = scaled_template.shape
# Template matching with scaled template
result = cv2.matchTemplate(main_gray, scaled_template, cv2.TM_CCOEFF_NORMED)
# Find locations with good matches
locations = np.where(result >= threshold)
for pt in zip(*locations[::-1]): # (x, y)
x, y = pt
score = result[pt[1], pt[0]]
# Calculate original size (without scale)
original_w = int(template_gray.shape[1])
original_h = int(template_gray.shape[0])
scaled_w = template_w
scaled_h = template_h
detections.append((x, y, scaled_w, scaled_h, score, scale))
return detections
def non_max_suppression(detections, overlap_threshold=0.3):
"""Removes overlapping detections"""
if len(detections) == 0:
return []
boxes = []
for (x, y, w, h, score, scale) in detections:
boxes.append([x, y, x+w, y+h, score, scale])
boxes = np.array(boxes)
if boxes.size == 0:
return []
# Sort by score
boxes = boxes[boxes[:, 4].argsort()[::-1]]
pick = []
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
scores = boxes[:, 4]
area = (x2 - x1 + 1) * (y2 - y1 + 1)
idxs = np.arange(len(x1))
while len(idxs) > 0:
i = idxs[0]
pick.append(i)
xx1 = np.maximum(x1[i], x1[idxs[1:]])
yy1 = np.maximum(y1[i], y1[idxs[1:]])
xx2 = np.minimum(x2[i], x2[idxs[1:]])
yy2 = np.minimum(y2[i], y2[idxs[1:]])
w = np.maximum(0, xx2 - xx1 + 1)
h = np.maximum(0, yy2 - yy1 + 1)
overlap = (w * h) / area[idxs[1:]]
idxs = idxs[1:][overlap <= overlap_threshold]
return [detections[i] for i in pick]
def draw_detections_with_scales(image, detections):
"""Draws detections with colors according to scale"""
result = image.copy()
if len(result.shape) == 2:
result = cv2.cvtColor(result, cv2.COLOR_GRAY2BGR)
# Different colors for different scales
colors = {
0.4: (255, 0, 0), # Red - very small template
0.5: (255, 128, 0), # Orange
0.6: (255, 255, 0), # Yellow
0.75: (128, 255, 0), # Yellow green
0.9: (0, 255, 0), # Green - slightly small template
1.0: (0, 255, 128), # Green blue - original size
1.1: (0, 255, 255), # Cyan - slightly large template
1.25: (0, 128, 255), # Light blue
1.5: (0, 0, 255), # Blue - large template
1.75: (128, 0, 255), # Violet
2.0: (255, 0, 255) # Magenta - very large template
}
for (x, y, w, h, score, scale) in detections:
# Find closest color
closest_scale = min(colors.keys(), key=lambda s: abs(s - scale))
color = colors[closest_scale]
cv2.rectangle(result, (x, y), (x + w, y + h), color, 2)
cv2.putText(result, f'S:{scale}', (x, y-5),
cv2.FONT_HERSHEY_SIMPLEX, 0.4, color, 1)
return result
# --- Image Loading ---
try:
# Make sure these paths are correct
main_image = cv2.imread('./Images/Intro_images/Tuna_Relative_Sizes.jpg')
template = cv2.imread('./Images/Intro_images/Tuna_template.jpg')
if main_image is None or template is None:
raise FileNotFoundError("Could not load images")
except Exception as e:
print(f"Error loading images: {e}")
# Create sample data for demonstration
main_image = np.ones((400, 600, 3), dtype=np.uint8) * 100
template = np.ones((40, 60, 3), dtype=np.uint8) * 200
# --- Detection ---
print("Detecting tuna with multiple template scales...")
detections = detect_tunas_corrected(main_image, template, threshold=0.475)
filtered_detections = non_max_suppression(detections)
print(f"Found {len(filtered_detections)} tuna")
# --- SEPARATE VISUALIZATIONS ---
# 1. Original Image
plt.figure(figsize=(12, 8))
plt.imshow(cv2.cvtColor(main_image, cv2.COLOR_BGR2RGB))
plt.title('Original Image - Tuna Relative Sizes')
plt.axis('off')
plt.show()
# 2. Original Template
plt.figure(figsize=(6, 4))
plt.imshow(cv2.cvtColor(template, cv2.COLOR_BGR2RGB))
plt.title('Original Tuna Template')
plt.axis('off')
plt.show()
# 3. Image with Detections
plt.figure(figsize=(12, 8))
result_image = draw_detections_with_scales(main_image, filtered_detections)
plt.imshow(cv2.cvtColor(result_image, cv2.COLOR_BGR2RGB))
plt.title(f'Multi-scale Detections ({len(filtered_detections)} tuna found)')
plt.axis('off')
plt.show()
# ==============================================================================
# 4. Templates at Different Scales
# ==============================================================================
print("Showing template pyramid (with relative sizes)...")
scales_to_show = [0.5, 0.75, 1.0, 1.5, 2.0]
# Build the pyramid using the COLOR template
template_pyramid, scales_display = build_template_pyramid(
template,
scales=scales_to_show
)
# Calculate canvas size
max_h = 0
total_w = 0
padding = 30 # Space between images
for tpl in template_pyramid:
max_h = max(max_h, tpl.shape[0])
total_w += tpl.shape[1] + padding
# Add space for text below
canvas_h = max_h + 60
canvas = np.zeros((canvas_h, total_w, 3), dtype=np.uint8) # Black canvas
current_x = padding // 2
for i, tpl in enumerate(template_pyramid):
h, w, _ = tpl.shape
# Calculate where to paste the image to align it at the bottom
y_offset = max_h - h
canvas[y_offset : y_offset + h, current_x : current_x + w] = tpl
# Add scale text
scale_text = f"Scale: {scales_display[i]}"
(text_w, text_h), _ = cv2.getTextSize(scale_text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
text_x = current_x + (w - text_w) // 2 # Center text
cv2.putText(canvas, scale_text, (text_x, max_h + 35),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
# Move the X cursor for the next image
current_x += w + padding
# Show the combined canvas
plt.figure(figsize=(15, 5)) # Adjust size to fit
plt.imshow(cv2.cvtColor(canvas, cv2.COLOR_BGR2RGB))
plt.title('Template Pyramid - Real Relative Sizes')
plt.axis('off')
plt.show()
# ==============================================================================
# END OF SECTION 4
# ==============================================================================
# 5. Color Legend
plt.figure(figsize=(8, 6))
legend = np.zeros((300, 500, 3), dtype=np.uint8)
scales_colors = [
(0.4, "Very small", (255, 0, 0)),
(0.5, "Small", (255, 128, 0)),
(0.75, "Medium-small", (0, 255, 0)),
(1.0, "Original size", (0, 255, 128)),
(1.5, "Medium-large", (0, 0, 255)),
(2.0, "Very large", (255, 0, 255))
]
for i, (scale, label, color) in enumerate(scales_colors):
y = 40 + i * 40
cv2.rectangle(legend, (20, y), (60, y+25), color, -1)
cv2.putText(legend, f"{label} (scale {scale})", (80, y+18),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
plt.imshow(legend)
plt.title('Legend - Template Scales and Colors')
plt.axis('off')
plt.show()
# 6. Detection Statistics by Scale
if filtered_detections:
scales_used = [det[5] for det in filtered_detections]
unique_scales = sorted(set(scales_used))
scale_counts = [scales_used.count(scale) for scale in unique_scales]
plt.figure(figsize=(10, 6))
# Create a simple colormap for the bars
bar_colors = plt.cm.viridis(np.linspace(0, 1, len(unique_scales)))
plt.bar([str(scale) for scale in unique_scales], scale_counts,
color=bar_colors)
plt.title('Detection Distribution by Template Scale')
plt.xlabel('Template Scale')
plt.ylabel('Number of Tuna Detected')
plt.grid(axis='y', alpha=0.3)
# Add values on bars
for i, v in enumerate(scale_counts):
plt.text(i, v + 0.1, str(v), ha='center', va='bottom')
plt.show()
# --- DETAILED CONSOLE INFORMATION ---
print("\n" + "="*60)
print("DETAILED DETECTION REPORT")
print("="*60)
print(f"Total tuna detected: {len(filtered_detections)}")
print(f"Match threshold used: 0.475")
print(f"Template scale range: 0.4 - 2.0")
if filtered_detections:
print("\n--- DETECTIONS BY SCALE ---")
scale_groups = {}
for det in filtered_detections:
scale = det[5]
if scale not in scale_groups:
scale_groups[scale] = []
scale_groups[scale].append(det)
for scale in sorted(scale_groups.keys()):
count = len(scale_groups[scale])
avg_score = np.mean([det[4] for det in scale_groups[scale]])
print(f" Scale {scale}: {count} tuna (average score: {avg_score:.3f})")
print("\n--- DETECTION EXAMPLES ---")
for i, (x, y, w, h, score, scale) in enumerate(filtered_detections[:8]):
print(f" Tuna {i+1}: Pos=({x:3d}, {y:3d}), Size={w:3d}x{h:2d}, "
f"Scale={scale:4.2f}, Score={score:.3f}")
# General statistics
avg_scale = np.mean([det[5] for det in filtered_detections])
avg_score = np.mean([det[4] for det in filtered_detections])
print(f"\n--- GENERAL STATISTICS ---")
print(f"Average detection scale: {avg_scale:.3f}")
print(f"Average match score: {avg_score:.3f}")
else:
print("No tuna found. Try lowering the match threshold.")
Detecting tuna with multiple template scales... Found 17 tuna
Showing template pyramid (with relative sizes)...
============================================================ DETAILED DETECTION REPORT ============================================================ Total tuna detected: 17 Match threshold used: 0.475 Template scale range: 0.4 - 2.0 --- DETECTIONS BY SCALE --- Scale 0.4: 3 tuna (average score: 0.482) Scale 0.5: 1 tuna (average score: 0.513) Scale 0.6: 2 tuna (average score: 0.498) Scale 0.9: 1 tuna (average score: 0.478) Scale 1.25: 2 tuna (average score: 0.521) Scale 2.0: 8 tuna (average score: 0.551) --- DETECTION EXAMPLES --- Tuna 1: Pos=(2077, 504), Size=281x124, Scale=0.40, Score=0.476 Tuna 2: Pos=(1210, 1443), Size=281x124, Scale=0.40, Score=0.476 Tuna 3: Pos=(2310, 1446), Size=281x124, Scale=0.40, Score=0.494 Tuna 4: Pos=(363, 2246), Size=351x156, Scale=0.50, Score=0.513 Tuna 5: Pos=(2035, 2191), Size=421x187, Scale=0.60, Score=0.477 Tuna 6: Pos=(2166, 3007), Size=421x187, Scale=0.60, Score=0.520 Tuna 7: Pos=(156, 2192), Size=632x280, Scale=0.90, Score=0.478 Tuna 8: Pos=(362, 2853), Size=878x390, Scale=1.25, Score=0.538 --- GENERAL STATISTICS --- Average detection scale: 1.312 Average match score: 0.523
# Example 2: Image Pyramid (Fixed Template Scale) with NMS
#
# This example demonstrates object detection by scaling the main
# image (creating an image pyramid) and matching a single,
# fixed-size template against each scaled version.
#
# Non-Max Suppression (NMS) is applied afterward to filter and
# consolidate the raw detections, ensuring that overlapping
# boxes for the same object are resolved into a single, final detection.
#
# For demonstration purposes, scaled images from the pyramid are
# displayed intentionally, even if no detections were found at that
# particular scale.
#
# Note on visualization: When viewing the pyramid levels sequentially,
# the initial (largest) scales may appear similar as they often exceed
# the display window, causing them to be scaled down for viewing.
#
# Note on thresholding: Various detection thresholds were evaluated.
# The threshold selected represents a deliberate balance between
# detection efficacy and minimizing noise (false positives).
import cv2
import numpy as np
import matplotlib.pyplot as plt
# --- 1. Define paths and load images ---
intro_images_path = './Images/Intro_images/'
print(f"Looking for images in: {intro_images_path}")
try:
# Load main image (scene)
image_color = cv2.imread(intro_images_path + 'Tuna_Relative_Sizes.jpg')
if image_color is None:
raise FileNotFoundError(f"Could not load 'Tuna_Relative_Sizes.jpg' from {intro_images_path}")
# Load template
template_color = cv2.imread(intro_images_path + 'Tuna_template.jpg')
if template_color is None:
raise FileNotFoundError(f"Could not load 'Tuna_template.jpg' from {intro_images_path}")
# Convert to grayscale for matching
image_gray = cv2.cvtColor(image_color, cv2.COLOR_BGR2GRAY)
template_gray = cv2.cvtColor(template_color, cv2.COLOR_BGR2GRAY)
# Get template dimensions
t_h, t_w = template_gray.shape
print(f"Main image loaded (gray): {image_gray.shape}")
print(f"Template loaded (gray): {template_gray.shape} (H, W)")
# --- 2. Implement Multi-Scale Search (Image Pyramid) ---
# List to save ALL detections (for final Global NMS)
all_detections = []
current_image = image_gray.copy()
current_scale_factor = 1.0 # Changed variable name for clarity
PYRAMID_SCALE_FACTOR = 1.25
MATCH_THRESHOLD = 0.7 # "Easy" threshold
# IoU threshold for NMS (local and global)
NMS_IOU_THRESHOLD = 0.3
while True:
# If current image is smaller than template, we can't search anymore.
if current_image.shape[0] < t_h or current_image.shape[1] < t_w:
print(f"Pyramid stopped: image ({current_image.shape}) smaller than template ({t_h, t_w}).")
break
# Calculate scale denominator (inverse representation)
scale_denominator = 1.0 / current_scale_factor
print(f"Processing scale 1/{1/scale_denominator:.2f} (factor: {current_scale_factor:.2f}), Image size: {current_image.shape}")
# Perform template matching on current pyramid layer
result = cv2.matchTemplate(current_image, template_gray, cv2.TM_CCOEFF_NORMED)
# Find all locations that exceed the threshold
locations = np.where(result >= MATCH_THRESHOLD)
# ====================================================================
# Logic for LOCAL NMS and visualization (at this scale)
# ====================================================================
# Lists for coordinates of THIS scale (for local NMS)
scale_boxes_local = [] # Local coordinates (x, y, w, h)
scale_scores_local = [] # Scores
# Convert pyramid image to color to draw green boxes
image_to_show = cv2.cvtColor(current_image, cv2.COLOR_GRAY2BGR)
for pt in zip(*locations[::-1]): # pt is (x, y)
score = result[pt[1], pt[0]]
# --- Local coordinates (for NMS of this scale) ---
x_local = pt[0]
y_local = pt[1]
scale_boxes_local.append([x_local, y_local, t_w, t_h])
scale_scores_local.append(score)
# --- Global coordinates (for final NMS) ---
# Map back to original image
orig_x1 = int(x_local * current_scale_factor)
orig_y1 = int(y_local * current_scale_factor)
orig_x2 = int((x_local + t_w) * current_scale_factor)
orig_y2 = int((y_local + t_h) * current_scale_factor)
# Add ALL raw detections to global list
all_detections.append([orig_x1, orig_y1, orig_x2, orig_y2, score])
# Apply local NMS (only for this scale visualization)
indices_local = cv2.dnn.NMSBoxes(scale_boxes_local, scale_scores_local, MATCH_THRESHOLD, NMS_IOU_THRESHOLD)
num_found_this_scale = 0
if isinstance(indices_local, np.ndarray):
indices_local = indices_local.flatten()
num_found_this_scale = len(indices_local)
# Draw only boxes that survived local NMS
for i in indices_local:
box = scale_boxes_local[i]
x, y, w, h = box
cv2.rectangle(image_to_show, (x, y), (x + w, y + h), (0, 255, 0), 2)
# Show pyramid image with its local detections
h, w = current_image.shape
plt.figure(figsize=(w * 0.01, h * 0.01))
plt.imshow(cv2.cvtColor(image_to_show, cv2.COLOR_BGR2RGB))
plt.title(f"Scale Layer: 1/{1/scale_denominator:.2f} | Detections (local): {num_found_this_scale}")
plt.axis('off')
plt.show()
# ====================================================================
# Reduce image for next pyramid level
new_width = int(current_image.shape[1] / PYRAMID_SCALE_FACTOR)
new_height = int(current_image.shape[0] / PYRAMID_SCALE_FACTOR)
if new_width < t_w or new_height < t_h:
break # Next iteration would be too small
current_image = cv2.resize(current_image, (new_width, new_height), interpolation=cv2.INTER_AREA)
current_scale_factor *= PYRAMID_SCALE_FACTOR
print(f"Found {len(all_detections)} raw detections in TOTAL (before Global NMS).")
# --- 3. Apply Global Non-Maximum Suppression (NMS) ---
boxes_for_nms = []
scores_for_nms = []
for (x1, y1, x2, y2, score) in all_detections:
boxes_for_nms.append([x1, y1, x2 - x1, y2 - y1]) # [x, y, w, h]
scores_for_nms.append(score)
NMS_SCORE_THRESHOLD = 0.7 # Same confidence threshold
if len(boxes_for_nms) > 0:
# Global NMS: find best boxes among ALL scales
indices = cv2.dnn.NMSBoxes(boxes_for_nms, scores_for_nms, NMS_SCORE_THRESHOLD, NMS_IOU_THRESHOLD)
if isinstance(indices, tuple):
indices = []
else:
indices = indices.flatten()
print(f"Found {len(indices)} final detections (after Global NMS).")
# --- 4. Draw results on original image ---
final_image_with_boxes = image_color.copy()
for i in indices:
box = boxes_for_nms[i]
x, y, w, h = box
cv2.rectangle(final_image_with_boxes, (x, y), (x + w, y + h), (0, 255, 0), 2)
score_text = f"{scores_for_nms[i]:.2f}"
cv2.putText(final_image_with_boxes, score_text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
# --- 5. Show FINAL result ---
print("\n--- SHOWING FINAL COMBINED RESULT ---")
plt.figure(figsize=(15, 10))
plt.imshow(cv2.cvtColor(final_image_with_boxes, cv2.COLOR_BGR2RGB))
plt.title(f'Final Result: {len(indices)} tuna detected (Global NMS)')
plt.axis('off')
plt.show()
else:
print("No detections found that exceeded thresholds.")
plt.figure(figsize=(12, 8))
plt.imshow(cv2.cvtColor(image_color, cv2.COLOR_BGR2RGB))
plt.title('No tuna found')
plt.axis('off')
plt.show()
except FileNotFoundError as e:
print(f"Error: {e}")
print("Please check that files 'Tuna_Relative_Sizes.jpg' and 'Tuna_template.jpg' exist")
print(f"in folder: {intro_images_path}")
except Exception as e:
print(f"An unexpected error occurred: {e}")
Looking for images in: ./Images/Intro_images/ Main image loaded (gray): (3564, 2971) Template loaded (gray): (312, 703) (H, W) Processing scale 1/1.00 (factor: 1.00), Image size: (3564, 2971)
Processing scale 1/1.25 (factor: 1.25), Image size: (2851, 2376)
Processing scale 1/1.56 (factor: 1.56), Image size: (2280, 1900)
Processing scale 1/1.95 (factor: 1.95), Image size: (1824, 1520)
Processing scale 1/2.44 (factor: 2.44), Image size: (1459, 1216)
Processing scale 1/3.05 (factor: 3.05), Image size: (1167, 972)
Processing scale 1/3.81 (factor: 3.81), Image size: (933, 777)
Found 5317 raw detections in TOTAL (before Global NMS). Found 7 final detections (after Global NMS). --- SHOWING FINAL COMBINED RESULT ---